> source('~/research/glp/replication/results.R', echo=TRUE)

> #### Setup
> rm(list=ls())

> load("posts/minreg6.RData")

> library(coda)

> #### Probabilities
> prob <- apply(glp.fit$p, 2, mean)

> ## Reshape into vars by K matrix
> K <- glp.fit$K

> prob.m <- matrix(NA, nrow=length(prob)/K, ncol=K)

> labels <- character(length(prob)/K)

> cur.m <- 1

> cur.p <- 1

> cur.l <- 1

> for (l in glp.fit$Lj) {
+   prob.m[cur.m:(cur.m+l-1), ] <- prob[cur.p:(cur.p + l * K - 1)]
+   labels[cur.m:(cur.m+l-1)] <- paste(colnames(glp.fit$x .... [TRUNCATED] 

> ### Figure 2
> ## Greyscale heatmap
> heatmap(prob.m, NA, NA, col=grey(seq(1, 0, -0.01)), scale="none",
+         labRow=labels, margins=c(2, 20))

> ### Figure 1
> ### Group probabilities
> 
> ## Build a group probabilities matrix (n X K)
> 
> group <- apply(glp.fit$g, 2, mean)

> group.m <- t(matrix(group, nrow=K))

> ## Calculate baseline probability of being in each group
> group.prob <- apply(group.m, 2, mean)

> ## Produce membership histograms
> par(mfrow=c(3,3))

> library(MASS)

> for (i in 1:K)
+   truehist(group.m[,i], xlab=paste("Group", i))

> par(mfrow=c(1,1))

> ### Table 12
> ### Now, for each group, figure out which characteristics provide the
> ### most information that you belong to the group.
> 
> ## Fi .... [TRUNCATED] 

> ## P(x_{ij} = l_j) = sum[ P(x_{ij} = l_j|G_i=k) * P(G_i=k) ]
> prob.trait <- apply(p.tmp, 1, sum)

> memb.p <- p.tmp / prob.trait

> ## Now find most extreme traits for each group
> dists.0 <- t(t(memb.p) - group.prob)

> dists <- abs(dists.0)

> info.traits <- data.frame(a=memb.p[,1])

> for (k in 1:K) {
+   oo <- order(dists[,k], decreasing=T)
+   tmp <- data.frame(a = labels[oo], b=memb.p[oo,k], c=prob.m[oo,k],
+                    .... [TRUNCATED] 

> info.traits <- info.traits[,-1]

> #### In-sample accuracy (numbers in text on pg 47)
> sum <- matrix(0, nrow=nrow(group.m), ncol=nrow(prob.m))

> for (i in 1:ncol(group.m))
+   sum <- sum + matrix(group.m[,i], ncol=1) %*% matrix(prob.m[,i], nrow=1)

> cur <- 1

> Lj <- sapply(glp.fit$Lj, function (L) {
+   tmp <- seq(cur, length.out=L)
+   cur <<- tmp[length(tmp)] + 1
+   tmp
+ })

> choice <- matrix(NA, nrow=nrow(sum), ncol=length(Lj))

> for (j in 1:length(Lj))
+   choice[,j] <- sapply(1:nrow(sum), function (i)
+                                       which.max(sum[i, Lj[[j]]]))

> correct <- glp.fit$x.nom == choice

> apply(correct, 2, mean, na.rm=T) 
  glp_offc1  glp_gender glp_marital         ses         age         edu  glp_region 
  0.7726295   0.8098343   0.9092236   0.7755068   0.4746025   0.5664561   0.5847458 

> Mode <- function (x) {
+   ux <- na.omit(unique(x))
+   ux[which.max(tabulate(match(x, ux)))]
+ }

> modal <- apply(glp.fit$x.nom, 2, Mode)

> modal.correct <- t(t(glp.fit$x.nom) == modal)

> ## Improvement over modal guessing
> tmp <- rbind(apply(correct, 2, mean, na.rm=T),
+              apply(modal.correct, 2, mean, na.rm=T))

> acc <- data.frame(rbind(tmp, tmp[1,]-tmp[2,]))

> acc # first row in model pct correct, second modal guess, third difference, by variable
  glp_offc1 glp_gender glp_marital       ses       age        edu glp_region
1 0.7726295  0.8098343   0.9092236 0.7755068 0.4746025 0.56645612  0.5847458
2 0.7726295  0.8098343   0.9092236 0.5584117 0.2878470 0.48276441  0.2786640
3 0.0000000  0.0000000   0.0000000 0.2170951 0.1867555 0.08369171  0.3060818

> apply(acc, 1, mean) # Avg accuracy, modal accuracy, improvement in accuracy
[1] 0.6989998 0.5856249 0.1133749
> 